************************************Make Economic Growth Bartik - Micro***********************************************************************

clear all

* making sector shares
use "$folder/data/input/gsp-data", clear
drop index
reshape wide gsp, i(statename year) j(sector)  string


drop if statename == "District of Columbia"
drop if statename == "Far West"
drop if statename == "Great Lakes"
drop if statename == "Mideast"
drop if statename == "New England"
drop if statename == "Plains"
drop if statename == "Rocky Mountain"
drop if statename == "Southeast"
drop if statename == "Southwest"
drop if statename == "United States"

* biennium includes stated year and previous year
gen biennium =  year + mod(year,2)
replace year = biennium


collapse (mean) gspA-gspI , by(statename year)


egen state = group(statename)
tsset state year, delta(2)

foreach x in A B C D E F G H I{

replace gsp`x' = L.gsp`x' if missing(gsp`x')

gen log_gsp_`x' = log(1+gsp`x')
gen growth_`x' = D.log_gsp_`x'

}

gen gsp_total = 0

foreach x in A B C D E F G H I{
gen gsp_nonmiss_`x'= gsp`x'
 replace gsp_nonmiss_`x'=0 if gsp_nonmiss_`x'==.

}

foreach x in A B C D E F G H I{
replace gsp_total = gsp_total + gsp_nonmiss_`x'
}

foreach x in A B C D E F G H I{
gen gsp_share_`x' = gsp`x' / gsp_total
}

egen min_year = min(year)


sort state year
foreach x in A B C D E F G H I{

bys state: gen gsp_initial1_`x' = gsp_share_`x'[1]
bys state: gen gsp_initial2_`x' = gsp_share_`x'[2] 
bys state: gen gsp_initial_`x' = (gsp_initial1_`x' + gsp_initial2_`x')/2
}

pca gsp_initial_*
predict pc1_sectors pc2_sectors pc3_sectors pc4_sectors, score

keep statename pc1_sectors pc2_sectors pc3_sectors pc4_sectors gsp_initial_*
duplicates drop

save sector-econ, replace


************************************Add Nunn's Data***********************************************************************

*1997 benchmark year(1997 NAICS)

use "$folder/data/input/contract_intensity_IO_1997", clear

gen industry=substr(industry_code,1,3)

rename industry industry_naics

collapse (mean) frac_lib_diff frac_lib_not_homog, by(industry_naics)

xtile frac_lib_diff2 = frac_lib_diff, n(2)

xtile frac_lib_not_homog2 = frac_lib_not_homog, n(2)

save contract_intensity_1997, replace


************************************Add Sector GSP Data***********************************************************************

* Dorn's crosswalk from SIC to 1997 NAICS 3 digits

use "$folder/data/input/cw_n97_s87", clear

tostring naics6, gen(naics6_s)

tostring sic4, gen(sic4_s)

generate len = strlen(sic4_s)

replace sic4_s = "0" + sic4_s if len==3

gen industry_naics=substr(naics6_s,1,3)

gen industry_sic=substr(sic4_s,1,2)

drop if weight<1

keep industry_naics industry_sic

duplicates tag industry_naics industry_sic, gen(dup)

drop if dup>0

drop dup

*manual checking 

drop if industry_naics == "233" |industry_naics == "311" | industry_naics == "322" |industry_naics == "326" |industry_naics == "334" |industry_naics == "442" |industry_naics == "561" |industry_naics == "511" 

drop if industry_sic == "80"

save cross-walk_ind, replace


*GSP data sectors 1963-1997 (SIC to NAICS with crosswalk and merge with Nunn data)

clear

import delimited "$folder/data/input/gsp_sic_all.csv", varnames(1) 

keep if componentname=="Gross domestic product (GDP) by state"

keep geoname industryclassification v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30 v31 v32 v33 v34 v35 v36 v37 v38 v39 v40 v41 v42 v43

drop if missing(geoname)

rename geoname statename

drop if statename == "District of Columbia"
drop if statename == "Far West"
drop if statename == "Great Lakes"
drop if statename == "Mideast"
drop if statename == "New England"
drop if statename == "Plains"
drop if statename == "Rocky Mountain"
drop if statename == "Southeast"
drop if statename == "Southwest"
drop if statename == "United States"

rename industryclassification sector

foreach v of varlist v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30 v31 v32 v33 v34 v35 v36 v37 v38 v39 v40 v41 v42 v43 {
   local x : variable label `v'
   rename `v' gsp`x'
}

foreach v of varlist     gsp1963 gsp1964 gsp1965 gsp1966 gsp1967 gsp1968 gsp1969 gsp1970 gsp1971 gsp1972 gsp1973 gsp1974 gsp1975 gsp1976 gsp1977 gsp1978 gsp1979 gsp1980 gsp1981 gsp1982 gsp1983 gsp1984 gsp1985 gsp1986 gsp1987 gsp1988 gsp1989 gsp1990 gsp1991 gsp1992 gsp1993 gsp1994 gsp1995 gsp1996 gsp1997 {
	gen `v'_r = real(`v')
	drop `v'
	rename `v'_r `v'
}

drop if sector=="..."

expand 2 if sector=="[01-02]", gen(dupindicator)

gen sector_n = "01" if sector=="[01-02]" & dupindicator==0
replace sector_n = "02" if sector=="[01-02]" & dupindicator==1

foreach v of varlist  gsp1963 gsp1964 gsp1965 gsp1966 gsp1967 gsp1968 gsp1969 gsp1970 gsp1971 gsp1972 gsp1973 gsp1974 gsp1975 gsp1976 gsp1977 gsp1978 gsp1979 gsp1980 gsp1981 gsp1982 gsp1983 gsp1984 gsp1985 gsp1986 gsp1987 gsp1988 gsp1989 gsp1990 gsp1991 gsp1992 gsp1993 gsp1994 gsp1995 gsp1996 gsp1997 {
	replace `v' = `v'/2 if sector_n == "01" | sector_n == "02"

}

expand 3 if sector=="[07-09]", gen(dupindicator1)

sort sector statename

by sector statename: gen newid = _n

replace sector_n = "07" if sector=="[07-09]" & newid==1
replace sector_n = "08" if sector=="[07-09]" & newid==2
replace sector_n = "09" if sector=="[07-09]" & newid==3

foreach v of varlist  gsp1963 gsp1964 gsp1965 gsp1966 gsp1967 gsp1968 gsp1969 gsp1970 gsp1971 gsp1972 gsp1973 gsp1974 gsp1975 gsp1976 gsp1977 gsp1978 gsp1979 gsp1980 gsp1981 gsp1982 gsp1983 gsp1984 gsp1985 gsp1986 gsp1987 gsp1988 gsp1989 gsp1990 gsp1991 gsp1992 gsp1993 gsp1994 gsp1995 gsp1996 gsp1997 {
	replace `v' = `v'/3 if sector_n == "07" | sector_n == "08"| sector_n == "09"

}

drop dupindicator* newid

gen industry=substr(sector,1,2)

replace industry=sector_n if sector_n!=""

drop if length(industry) < 2

rename industry industry_sic

collapse (sum) gsp*, by(industry_sic statename)

reshape long gsp, i(statename industry_sic) j(year)

statastates, name(statename)

drop statename state_fips

drop _merge

drop if year==.

mmerge industry_sic using cross-walk_ind, type(n:n) 
 
drop if industry_naics==""
drop if year==.

drop _merge 

save gsp_ind.dta, replace

merge m:1 industry_naics using "contract_intensity_1997"
drop _merge

drop if frac_lib_not_homog==.

drop if year==.

tostring frac_lib_not_homog2, gen(sector_spec)

save sector-econ1963, replace

collapse (sum) gsp, by(sector_spec state_abbrev year)

reshape wide gsp, i(state_abbrev year) j(sector_spec)  string

drop if year==.

* biennium includes stated year and previous year
gen biennium =  year + mod(year,2)
replace year = biennium

collapse (mean) gsp* , by(state_abbrev year)

egen state = group(state_abbrev)
tsset state year, delta(2)

foreach x of numlist 1/2 {

replace gsp`x' = L.gsp`x' if missing(gsp`x')

gen log_gsp_`x' = log(1+gsp`x')
gen growth_`x' = D.log_gsp_`x'

}

drop if year==1998

save sector-spec1963, replace

*GSP data sectors 1997-2015 (NAICS to NAICS to merge with Nunn data)

clear
import delimited "$folder/data/input/gsp_naics_all.csv", varnames(1) 

keep if componentname=="Gross domestic product (GDP) by state"

keep geoname industryclassification v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27

drop if missing(geoname)

rename geoname statename

drop if statename == "District of Columbia"
drop if statename == "Far West"
drop if statename == "Great Lakes"
drop if statename == "Mideast"
drop if statename == "New England"
drop if statename == "Plains"
drop if statename == "Rocky Mountain"
drop if statename == "Southeast"
drop if statename == "Southwest"
drop if statename == "United States"

rename industryclassification sector

foreach v of varlist v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 {
   local x : variable label `v'
   rename `v' gsp`x'
}

foreach v of varlist    gsp1997 gsp1998 gsp1999 gsp2000 gsp2001 gsp2002 gsp2003 gsp2004 gsp2005 gsp2006 gsp2007 gsp2008 gsp2009 gsp2010 gsp2011 gsp2012 gsp2013 gsp2014 gsp2015 {
	gen `v'_r = real(`v')
	drop `v'
	rename `v'_r `v'
}

drop if sector=="..."

expand 2 if sector=="111-112", gen(dupindicator)

gen sector_n = "111" if sector=="111-112" & dupindicator==0
replace sector_n = "112" if sector=="111-112" & dupindicator==1

foreach v of varlist  gsp1997 gsp1998 gsp1999 gsp2000 gsp2001 gsp2002 gsp2003 gsp2004 gsp2005 gsp2006 gsp2007 gsp2008 gsp2009 gsp2010 gsp2011 gsp2012 gsp2013 gsp2014 gsp2015  {
	replace `v' = `v'/2 if sector_n == "111" | sector_n == "112"

}

expand 3 if sector=="113-115", gen(dupindicator1)

sort sector statename

by sector statename: gen newid = _n

replace sector_n = "113" if sector=="113-115" & newid==1
replace sector_n = "114" if sector=="113-115" & newid==2
replace sector_n = "115" if sector=="113-115" & newid==3

foreach v of varlist  gsp1997 gsp1998 gsp1999 gsp2000 gsp2001 gsp2002 gsp2003 gsp2004 gsp2005 gsp2006 gsp2007 gsp2008 gsp2009 gsp2010 gsp2011 gsp2012 gsp2013 gsp2014 gsp2015 {
	replace `v' = `v'/3 if sector_n == "113" | sector_n == "114"| sector_n == "115"

}

expand 2 if sector=="311-312", gen(dupindicator2)

replace sector_n = "311" if sector=="311-312" & dupindicator2==0
replace sector_n = "312" if sector=="311-312" & dupindicator2==1

foreach v of varlist  gsp1997 gsp1998 gsp1999 gsp2000 gsp2001 gsp2002 gsp2003 gsp2004 gsp2005 gsp2006 gsp2007 gsp2008 gsp2009 gsp2010 gsp2011 gsp2012 gsp2013 gsp2014 gsp2015  {
	replace `v' = `v'/2 if sector_n == "311" | sector_n == "312"

}

expand 2 if sector=="313-314", gen(dupindicator3)

replace sector_n = "313" if sector=="313-314" & dupindicator3==0
replace sector_n = "314" if sector=="313-314" & dupindicator3==1

foreach v of varlist  gsp1997 gsp1998 gsp1999 gsp2000 gsp2001 gsp2002 gsp2003 gsp2004 gsp2005 gsp2006 gsp2007 gsp2008 gsp2009 gsp2010 gsp2011 gsp2012 gsp2013 gsp2014 gsp2015  {
	replace `v' = `v'/2 if sector_n == "313" | sector_n == "314"

}

expand 2 if sector=="315-316", gen(dupindicator4)

replace sector_n = "315" if sector=="315-316" & dupindicator4==0
replace sector_n = "316" if sector=="315-316" & dupindicator4==1

foreach v of varlist  gsp1997 gsp1998 gsp1999 gsp2000 gsp2001 gsp2002 gsp2003 gsp2004 gsp2005 gsp2006 gsp2007 gsp2008 gsp2009 gsp2010 gsp2011 gsp2012 gsp2013 gsp2014 gsp2015  {
	replace `v' = `v'/2 if sector_n == "315" | sector_n == "316"

}

expand 3 if sector=="487-488, 492", gen(dupindicator5)

sort sector statename

by sector statename: gen newid1 = _n

replace sector_n = "487" if sector=="487-488, 492" & newid==1
replace sector_n = "488" if sector=="487-488, 492" & newid==2
replace sector_n = "492" if sector=="487-488, 492" & newid==3

foreach v of varlist  gsp1997 gsp1998 gsp1999 gsp2000 gsp2001 gsp2002 gsp2003 gsp2004 gsp2005 gsp2006 gsp2007 gsp2008 gsp2009 gsp2010 gsp2011 gsp2012 gsp2013 gsp2014 gsp2015 {
	replace `v' = `v'/3 if sector_n == "487" | sector_n == "488"| sector_n == "492"

}

drop dupindicator* newid*

* no need for more - Nunn's data only up to NAICS 5112

gen industry=substr(sector,1,3)

replace industry=sector_n if sector_n!=""

drop if length(industry) < 3

drop if strpos(industry,",")>0

drop if strpos(industry,"-")>0

drop gsp2015

collapse (sum) gsp*, by(industry statename)

reshape long gsp, i(statename industry) j(year)

statastates, name(statename)

drop statename state_fips

drop _merge

rename industry industry_naics 

merge m:1 industry_naics using "contract_intensity_1997"
drop _merge

drop if frac_lib_not_homog==.

drop if year==.

tostring frac_lib_not_homog2, gen(sector_spec)

save sector-econ1997, replace

collapse (sum) gsp, by(sector_spec state_abbrev year)

reshape wide gsp, i(state_abbrev year) j(sector_spec)  string

* biennium includes stated year and previous year
gen biennium =  year + mod(year,2)
replace year = biennium

collapse (mean) gsp* , by(state_abbrev year)

egen state = group(state_abbrev)
tsset state year, delta(2)

foreach x of numlist 1/2 {

replace gsp`x' = L.gsp`x' if missing(gsp`x')

gen log_gsp_`x' = log(1+gsp`x')
gen growth_`x' = D.log_gsp_`x'

}

save sector-spec1997, replace

append using sector-spec1963

drop if year==.

save sector-spec, replace
